{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy import genfromtxt\n",
    "import matplotlib.pyplot as plt  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[     nan      nan      nan      nan      nan      nan      nan      nan]\n",
      " [     nan   83.     234.289  235.6    159.     107.608 1947.      60.323]\n",
      " [     nan   88.5    259.426  232.5    145.6    108.632 1948.      61.122]\n",
      " [     nan   88.2    258.054  368.2    161.6    109.773 1949.      60.171]\n",
      " [     nan   89.5    284.599  335.1    165.     110.929 1950.      61.187]\n",
      " [     nan   96.2    328.975  209.9    309.9    112.075 1951.      63.221]\n",
      " [     nan   98.1    346.999  193.2    359.4    113.27  1952.      63.639]\n",
      " [     nan   99.     365.385  187.     354.7    115.094 1953.      64.989]\n",
      " [     nan  100.     363.112  357.8    335.     116.219 1954.      63.761]\n",
      " [     nan  101.2    397.469  290.4    304.8    117.388 1955.      66.019]\n",
      " [     nan  104.6    419.18   282.2    285.7    118.734 1956.      67.857]\n",
      " [     nan  108.4    442.769  293.6    279.8    120.445 1957.      68.169]\n",
      " [     nan  110.8    444.546  468.1    263.7    121.95  1958.      66.513]\n",
      " [     nan  112.6    482.704  381.3    255.2    123.366 1959.      68.655]\n",
      " [     nan  114.2    502.601  393.1    251.4    125.368 1960.      69.564]\n",
      " [     nan  115.7    518.173  480.6    257.2    127.852 1961.      69.331]\n",
      " [     nan  116.9    554.894  400.7    282.7    130.081 1962.      70.551]]\n"
     ]
    }
   ],
   "source": [
    "# 读入数据 \n",
    "data = genfromtxt(r\"longley.csv\",delimiter=',')\n",
    "print(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 234.289  235.6    159.     107.608 1947.      60.323]\n",
      " [ 259.426  232.5    145.6    108.632 1948.      61.122]\n",
      " [ 258.054  368.2    161.6    109.773 1949.      60.171]\n",
      " [ 284.599  335.1    165.     110.929 1950.      61.187]\n",
      " [ 328.975  209.9    309.9    112.075 1951.      63.221]\n",
      " [ 346.999  193.2    359.4    113.27  1952.      63.639]\n",
      " [ 365.385  187.     354.7    115.094 1953.      64.989]\n",
      " [ 363.112  357.8    335.     116.219 1954.      63.761]\n",
      " [ 397.469  290.4    304.8    117.388 1955.      66.019]\n",
      " [ 419.18   282.2    285.7    118.734 1956.      67.857]\n",
      " [ 442.769  293.6    279.8    120.445 1957.      68.169]\n",
      " [ 444.546  468.1    263.7    121.95  1958.      66.513]\n",
      " [ 482.704  381.3    255.2    123.366 1959.      68.655]\n",
      " [ 502.601  393.1    251.4    125.368 1960.      69.564]\n",
      " [ 518.173  480.6    257.2    127.852 1961.      69.331]\n",
      " [ 554.894  400.7    282.7    130.081 1962.      70.551]]\n",
      "[[ 83. ]\n",
      " [ 88.5]\n",
      " [ 88.2]\n",
      " [ 89.5]\n",
      " [ 96.2]\n",
      " [ 98.1]\n",
      " [ 99. ]\n",
      " [100. ]\n",
      " [101.2]\n",
      " [104.6]\n",
      " [108.4]\n",
      " [110.8]\n",
      " [112.6]\n",
      " [114.2]\n",
      " [115.7]\n",
      " [116.9]]\n"
     ]
    }
   ],
   "source": [
    "# 切分数据\n",
    "x_data = data[1:,2:]\n",
    "y_data = data[1:,1,np.newaxis]\n",
    "print(x_data)\n",
    "print(y_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(16, 6)\n",
      "(16, 1)\n",
      "(16, 7)\n"
     ]
    }
   ],
   "source": [
    "print(np.mat(x_data).shape)\n",
    "print(np.mat(y_data).shape)\n",
    "# 给样本添加偏置项\n",
    "X_data = np.concatenate((np.ones((16,1)),x_data),axis=1)\n",
    "print(X_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1.00000e+00 2.34289e+02 2.35600e+02 1.59000e+02 1.07608e+02 1.94700e+03\n",
      "  6.03230e+01]\n",
      " [1.00000e+00 2.59426e+02 2.32500e+02 1.45600e+02 1.08632e+02 1.94800e+03\n",
      "  6.11220e+01]\n",
      " [1.00000e+00 2.58054e+02 3.68200e+02 1.61600e+02 1.09773e+02 1.94900e+03\n",
      "  6.01710e+01]]\n"
     ]
    }
   ],
   "source": [
    "print(X_data[:3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 岭回归标准方程法求解回归参数\n",
    "def weights(xArr, yArr, lam=0.2):\n",
    "    xMat = np.mat(xArr)\n",
    "    yMat = np.mat(yArr)\n",
    "    xTx = xMat.T*xMat # 矩阵乘法\n",
    "    rxTx = xTx + np.eye(xMat.shape[1])*lam\n",
    "    # 计算矩阵的值,如果值为0，说明该矩阵没有逆矩阵\n",
    "    if np.linalg.det(rxTx) == 0.0:\n",
    "        print(\"This matrix cannot do inverse\")\n",
    "        return\n",
    "    # xTx.I为xTx的逆矩阵\n",
    "    ws = rxTx.I*xMat.T*yMat\n",
    "    return ws"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 7.38107363e-04]\n",
      " [ 2.07703836e-01]\n",
      " [ 2.10076376e-02]\n",
      " [ 5.05385441e-03]\n",
      " [-1.59173066e+00]\n",
      " [ 1.10442920e-01]\n",
      " [-2.42280461e-01]]\n"
     ]
    }
   ],
   "source": [
    "ws = weights(X_data,y_data)\n",
    "print(ws)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[ 83.55075226],\n",
       "        [ 86.92588689],\n",
       "        [ 88.09720227],\n",
       "        [ 90.95677622],\n",
       "        [ 96.06951002],\n",
       "        [ 97.81955375],\n",
       "        [ 98.36444357],\n",
       "        [ 99.99814266],\n",
       "        [103.26832266],\n",
       "        [105.03165135],\n",
       "        [107.45224671],\n",
       "        [109.52190685],\n",
       "        [112.91863666],\n",
       "        [113.98357055],\n",
       "        [115.29845063],\n",
       "        [117.64279933]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算预测值\n",
    "np.mat(X_data)*np.mat(ws)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
